Halotools jackknife seems to be underestimating the true variance of the statistics I'm calculating. I'm gonna investigate this.


In [54]:
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [55]:
# This file just makes the data for the sampler, so multiple samplers can use the exact same data.
from pearce.mocks import cat_dict
from scipy.optimize import minimize_scalar
import numpy as np
import cPickle as pickle
from os import path

In [56]:
fixed_params = {}#'f_c':1.0}#,'logM1': 13.8 }# 'z':0.0}

boxno, realization = 0, 1
cosmo_params = {'simname':'testbox', 'boxno': boxno, 'realization': realization, 'scale_factors':[1.0], 'system': 'sherlock'}
cat = cat_dict[cosmo_params['simname']](**cosmo_params)#construct the specified catalog!

In [57]:
cat.load(1.0, HOD='zheng07')

In [58]:
emulation_point = [('logM0', 13.5), ('sigma_logM', 0.25),
                    ('alpha', 0.9),('logM1', 13.5)]#, ('logMmin', 12.233)]

em_params = dict(emulation_point)
em_params.update(fixed_params)

In [59]:
def add_logMmin(hod_params, cat):
    """
    In the fixed number density case, find the logMmin value that will match the nd given hod_params
    :param: hod_params:
        The other parameters besides logMmin
    :param cat:
        the catalog in question
    :return:
        None. hod_params will have logMmin added to it.
    """
    hod_params['logMmin'] = 13.0 #initial guess
    #cat.populate(hod_params) #may be overkill, but will ensure params are written everywhere
    def func(logMmin, hod_params):
        hod_params.update({'logMmin':logMmin})
        return (cat.calc_analytic_nd(hod_params) - 1e-4)**2

    res = minimize_scalar(func, bounds = (12, 16), args = (hod_params,), options = {'maxiter':100}, method = 'Bounded')

    # assuming this doens't fail
    hod_params['logMmin'] = res.x

In [60]:
add_logMmin(em_params, cat)

In [61]:
r_bins = np.logspace(-1.1, 1.6, 19)
rpoints = (r_bins[1:] + r_bins[:-1])/2.0#emu.scale_bin_centers

In [62]:
from time import time

In [63]:
xi_vals = []
for i in xrange(50):
    np.random.seed(int(time()))

    cat.populate(em_params)
    xi_vals.append(cat.calc_xi(r_bins))

In [ ]:
# TODO need a way to get a measurement cov for the shams
xi_vals = np.array(xi_vals)

In [ ]:
cat.populate(em_params)
yjk, cov = cat.calc_xi(r_bins, do_jackknife=True, jk_args = {'n_rands':10, 'n_sub':5})

In [ ]:
#y10 = np.loadtxt('xi_gg_true_jk.npy')
#cov10 = np.loadtxt('xi_gg_cov_true_jk.npy')

#y = np.log10(y10)
y = np.mean(xi_vals, axis = 0)

In [ ]:
np.sqrt(np.diag(cov))/yjk

In [ ]:
np.sqrt(np.diag(np.cov(xi_vals, rowvar = False)))/y

In [ ]:
plt.errorbar(rpoints, yjk,lw = 2, yerr = np.sqrt(np.diag(cov)))
for sample in xi_vals:
    plt.plot(rpoints, sample, color = 'r', alpha = 0.2)
plt.loglog();

In [ ]:
np.sqrt(np.diag(cov))

In [ ]:
np.sqrt(np.diag(np.cov(xi_vals, rowvar = False)))

In [ ]: